%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import cufflinks as cf
import plotly.offline as py
import plotly.graph_objs as go
df_mcr=pd.read_csv("mcr.csv",sep=",")
cf.go_offline()
py.init_notebook_mode()
df_mcr.head(15)
import numpy as np
mask1=df_mcr["Q1"]=="Male"
mask2=df_mcr["Q1"]=="Female"
df_mcr[np.logical_or(mask1,mask2)]
plt.figure(figsize=(15,15))
c=df_mcr[np.logical_or(mask1,mask2)]["Q1"].value_counts()
data=go.Data([go.Bar(x=c.index,y=c.values,orientation="v")])
layout=go.Layout(height=800,title="Gender Distribution of the Survey")
fig=go.Figure(data,layout)
py.iplot(fig)
df_age=df_mcr["Q2"][1:].dropna()
a=df_age.value_counts()
data=go.Data([go.Bar(x=a.index,y=a.values,orientation="v",marker=dict(color='rgb(158,202,225)',
line=dict(
color='rgb(8,48,107)',
width=1.5)))])
layout=go.Layout(height=800,title="Age of the Survey's Participants")
fig=go.Figure(data,layout)
py.iplot(fig)
df_country=df_mcr[df_mcr!="I do not wish to disclose my location"]["Q3"][1:]
c=df_country.value_counts()
data=go.Data([go.Bar(x=c.index,y=c.values,orientation="v",marker=dict(
color='#20716A',
opacity=0.5
))])
layout=go.Layout(height=900,title="Countries of the survey's participants")
figure=go.Figure(data,layout)
py.iplot(figure)
plt.figure(figsize=(12,12))
df_degree=df_mcr["Q4"][1:].dropna()
df_degree.value_counts().plot(kind="bar")
field_of_study=df_mcr["Q5"][1:].value_counts()
#data=go.Data([go.Pie(labels=field_of_study.index,values=field_of_study.values)])
#layout=go.layout(title="Field of Study")
#figure=go.Figure(data,layout)
fig={
"data":[{
"labels":field_of_study.index,
"values":field_of_study.values,
"textposition":"inside",
"hole":0.5,
"type":"pie"
}],
"layout":{
'title':"Undergraduate Major",
"annotations":[{
"showarrow":False,
"text":"Field of Study",
"font":{
"size":25
}
}
]
}
}
py.iplot(fig)
What we can conclude from the graph above is the diversity of the domains which use Data Science as an analytical tool. In Business for example, algorithms are being used to predict stock prices and detect frauds. In medecine, the University of California San Diego's data science team implemented an optimized deep learning neural network model to detect anomalies in the human eyes just from the picture. In astronomy and physics, the CERN is the best example that can be given to illustrate the importance of data in such fields. The LHC collects petabytes of data from thousands of particle collisions that happen every second in the core of the LHC.
fs=df_mcr["Q6"][1:].value_counts()
data=go.Data([go.Bar(x=fs.index,y=fs.values,orientation="v")])
layout=go.Layout(title="Field of Study",height=500)
fig=go.Figure(data,layout)
py.iplot(fig)